# -*- coding: utf-8 -*-
"""
Created on Fri Oct 20 22:18:55 2017

@author: Luther
"""

import requests
from bs4 import BeautifulSoup
import time


def getHTMLText(url):
    print('正在连接网络...')
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        print('连接成功！')
        return r.text
    except:
        print('产生异常！')
        text = 'error'
        return text


def getUniList(uni_list, text):
    print('正在解析页面...')
    soup = BeautifulSoup(text, 'lxml')
    tr_list = soup.tbody.find_all('tr')
    for i in tr_list:
        td_list = i.find_all('td')
        uni_list.append(
            '{},{},{},{}\n'.format(td_list[0].string, td_list[1].string,
                                   td_list[2].string, td_list[3].string))
    print('解析成功！')
    return uni_list


def saveUniList(uni_list):
    print('正在写入文件...')
    count = 0
    for each in uni_list:
        with open('D:\\uni_list.csv', 'w') as f:
            f.write(each)
            count = count + 1
            print('\r当前进度：{:.2f}%'.format(100 * count / len(uni_list)), end='')
    print('\n中国最好大学排名文件已生成，位于D:\\uni_list.csv')


def main():
    text = getHTMLText(url)
    if text != 'error':
        uni_list = ['排名,学校名称,省市,总分\n']
        uni_list = getUniList(uni_list, text)
        saveUniList(uni_list)


url = input('请键入url:')

time1 = time.time()
main()
time2 = time.time()
d_time = time2 - time1
print('共用时{:.2f}s'.format(d_time))
